from google.colab import drive
drive.mount('/content/gdrive')
!nvidia-smi
%%shell
# Download TorchVision repo to use some files from
# references/detection
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.3.0
cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from glob import glob
import cv2
import matplotlib.pyplot as plt
import json
from pprint import pprint
from tqdm import tqdm_notebook as tqdm
from PIL import Image, ImageDraw
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor,FasterRCNN
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection.rpn import RPNHead
from torchvision.datasets import CocoDetection
import torch.optim as optim
from torchvision import transforms
import torch
from torch.utils.data import random_split
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import pycocotools
import utils
import transforms as T
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print('device: ', device)
PATH = '/content/gdrive/My Drive/Training_afeka_object_detection/images'
In order to build an object detection model, data is required. without it the model wont learn to detect anything. In this notebook, we build a neural network for detecting face masks. To be precise, we trained a model for detect whether a person is wearing a mask as required. Apart from the images we didn't get any information about the locations and size of the masks we would like to identify, we chose to use a great and simple COCO Annotator tagging tool which allows you to tag the object (masks) and get a json file with the desired label details in coco format. Practical knowledge of this tool will allow us in the future to train models to detect any object we want.
with open('/content/gdrive/My Drive/finished_total.json') as json_data: # Read annotations file
data = json.load(json_data)
images = []
for dic in data['images']:
images.append((dic['file_name'], dic['id'], dic['height'], dic['width']))
We manually extracted for each image its annotations from the file, at the end we created a data frame so that we could continue the processing more easily and draw conclusions about the form of the training.
in the json file for each image has a list with all of its bbox. In the data frame we separated so that each BBOX would appear in a separate row
def get_dic_format(index, image_id, path, height, width, x_min, y_min, x_max, y_max, w, h, area, label):
return {
'index': index,
'image_id': image_id,
'path': path,
'height': height,
'width': width,
'x_min': x_min,
'y_min': y_min,
'x_max': x_max,
'y_max': y_max,
'w': w,
'h': h,
'area': area,
'label': label
}
path = PATH + '/*'
train_paths = glob(path)
new_data = []
for img in images: # Run over the images from json file
for p in train_paths:
name = p.split('/')[-1]
if img[0] == name: #Finding an image and detailing it by comparing images names
none_bbox = False
for dic in data['annotations']:
if dic['image_id'] == img[1]:
box = dic['bbox']
y = int(box[0])
x = int(box[1])
h = int(box[2])
w = int(box[3])
if h*w != 0:
none_bbox = True
new_data.append(get_dic_format(img[1], img[0],
p, img[2], img[3],
x, y, (x+w), (y+h),
w, h, dic['area'], 1))
if not none_bbox:
new_data.append(get_dic_format(img[1], img[0],
p, img[2], img[3],
0, 0, 0, 0, 0, 0, 0, 0))
df = pd.DataFrame.from_dict(new_data)
display(df)
[https://media1.tenor.com/images/42983a95657f874f62cfc1f1152da484/tenor.gif] link text
from IPython.display import Image
Image(url='https://media1.tenor.com/images/42983a95657f874f62cfc1f1152da484/tenor.gif')
# This is formatted as code
# Helper functions for statics part
def count_boxes(df):
images_list = df['image_id'].unique()
with_mask = []
without_mast = []
count_boxes = 0
total = 0
for name in images_list:
mini_df = df[df['image_id'] == name]
if mini_df['label'].values[0] == 1:
count_boxes+=1
with_mask.append(*mini_df['path'].unique())
total += len(mini_df)
else:
without_mast.append(*mini_df['path'].unique())
return count_boxes, with_mask, without_mast, total/count_boxes
def avargae_height(df):
return df['height'].mean()
def avargae_width(df):
return df['width'].mean()
def avargae_area(df):
return df['area'].mean()
from PIL import Image, ImageDraw
def add_bbox(path, size=(600, 600)):
image = Image.open(path)
draw = ImageDraw.Draw(image)
for anno in df[df['path'] == path][['x_min','y_min', 'x_max', 'y_max']].values:
draw.rectangle([(anno[1], anno[0]), (anno[3], anno[2])], outline ="blue", width =3)
image = image.resize(size)
return(image)
def plot_images(paths, title):
imgs = [add_bbox(path) for path in np.random.choice(paths, 4, replace=False)]
plt.figure(figsize=(20, 5))
plt.suptitle(title, fontsize=20)
for i, img in enumerate(imgs):
plt.subplot(1, 5, i+1)
plt.imshow(img)
plt.axis('off')
plt.show()
from IPython.display import Image
Image(url='https://media1.tenor.com/images/42983a95657f874f62cfc1f1152da484/tenor.gif?itemid=8718500')
count_boxes, with_mask, without_mask, avg_boxes = count_boxes(df)
total_lenght = len(df['image_id'].unique())
print(f'Total number of images {total_lenght}')
print(f'Total number of images with face mask {count_boxes}')
print(f'Total number of images without face mask {total_lenght - count_boxes}')
print(f'Avarge image height {avargae_height(df)}')
print(f'Avarge image width {avargae_width(df)}')
print(f'Avarge masks in image {avg_boxes}')
plot_images(with_mask, 'Exampels with face mask')
plot_images(without_mask, 'Exampels without face mask')
After examining the images we saw that on average there are between 3 and 4 face masks in each image. In addition we noticed that in most of the pictures the amount of people who don't wear face mask is larger, we decided not to define this as "no mask" labels and add this case to background labels. By taking this assumption we ignore the images without any face mask in it.
#remove negative examples
df = df[df['label']!=0]
display(df)
def get_annotations(annotations, device='cuda'):
''' convert a single annotation to a none coco format '''
label = {
'boxes': torch.tensor([to_rcnnformat(a['bbox']) for a in annotations]).to(device),
'labels': torch.tensor([1 for a in annotations]).to(device),
'image_id': torch.tensor(annotations[0]['image_id']).to(device),
'area': torch.tensor([a['area'] for a in annotations]).to(device),
'iscrowd': torch.tensor([0 for a in annotations]).to(device)
}
return label
def to_rcnnformat(bbox):
''' convert a coco bounding box to a none coco format '''
return [bbox[0],bbox[1],bbox[0]+bbox[2],bbox[1]+bbox[3]]
class FaceMaskDataset(Dataset):
""" Face Mask dataset """
def __init__(self, data, root="", transform=None, cocomode=True):
"""
Args:
data (dict) json data
root (string): location of images
transform (callable, optional): Optional transform to be applied
on a sample.
cocomode (bool): whether use coco data formate
"""
self.images = data['images']
self.labels = {img['id']:[d for d in data['annotations'] if d['image_id'] == img['id']] for img in self.images}
self.root = root
self.imagetotensor = transforms.Compose([transforms.ToTensor()])
self.transform = transform
self.cocomode = cocomode
self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
im = Image.open(self.root + '/' + self.images[idx]['file_name']).convert('RGB')
label_idx = self.images[idx]['id']
label = self.labels[label_idx]
if not self.cocomode:
im = self.imagetotensor(im).to(self.device)
label = get_annotations(label, self.device)
if self.transform:
im = self.transform(im)
return im, label
def load_resnet(backbone_model, num_classes=2): #backcone_model = ['resnet50', 'resnet101', 'resnet152']
# create an anchor_generator for the FPN
# which by default has 5 outputs
anchor_generator = AnchorGenerator(
sizes=tuple([(32, 64, 128, 256) for _ in range(5)]),
aspect_ratios=tuple([(0.25, 0.5, 1.0, 2.0) for _ in range(5)]))
pretrained_backbone=True
backbone = resnet_fpn_backbone(backbone_model, pretrained_backbone)
fasterrcnn = FasterRCNN(backbone, num_classes,
rpn_anchor_generator=anchor_generator,
rpn_head=RPNHead(256, anchor_generator.num_anchors_per_location()[0]))
in_features = fasterrcnn.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
fasterrcnn.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes = num_classes)
return fasterrcnn
def get_model(architecture:str, num_classes=2):
anchor_generator = AnchorGenerator(
sizes=tuple([(32, 64, 128, 256) for _ in range(5)]),
aspect_ratios=tuple([(0.25, 0.5, 1.0, 2.0) for _ in range(5)]))
if architecture == 'resnet50':
return load_resnet('resnet50', num_classes)
if architecture == 'resnet101':
return load_resnet('resnet101', num_classes)
if architecture == 'resnet152':
return load_resnet('resnet152', num_classes)
if architecture == 'mobilenet':
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
backbone.out_channels = 1280
if architecture == 'squeezenet':
backbone = torchvision.models.squeezenet1_0(pretrained=True).features
backbone.out_channels = 512
fasterrcnn = FasterRCNN(backbone, num_classes,
rpn_anchor_generator=anchor_generator,
rpn_head=RPNHead(backbone.out_channels, anchor_generator.num_anchors_per_location()[0]))
in_features = fasterrcnn.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
fasterrcnn.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes = num_classes)
return fasterrcnn
with open('/content/gdrive/My Drive/all_without_inannotated.json') as json_data: # Read annotations file
data = json.load(json_data)
def collate_fn(batch):
return tuple(zip(*batch))
mmasks_dataset = FaceMaskDataset(data, PATH, cocomode=False)
train_dataset, val_dataset, test_dataset = random_split(mmasks_dataset,
[150, 27, 10])
trian_data_loader = DataLoader(train_dataset, batch_size=4,
shuffle=True, collate_fn=collate_fn)
test_data_loader = DataLoader(test_dataset, batch_size=1,
shuffle=False, collate_fn=collate_fn)
val_data_loader = DataLoader(val_dataset, batch_size=1,
shuffle=False, collate_fn=collate_fn)
!pip install tensorboard==2.3.0
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()
def train(model, optimizer, data_loader, device, epoch, print_freq):
'''We ran the Vision train_one_epoch function
so that the function returns the training results '''
model.train()
metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
header = 'Epoch: [{}]'.format(epoch)
lr_scheduler = None
if epoch == 0:
warmup_factor = 1. / 1000
warmup_iters = min(1000, len(data_loader) - 1)
lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
for images, targets in metric_logger.log_every(data_loader, print_freq, header):
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
# reduce losses over all GPUs for logging purposes
loss_dict_reduced = utils.reduce_dict(loss_dict)
losses_reduced = sum(loss for loss in loss_dict_reduced.values())
loss_value = losses_reduced.item()
if not math.isfinite(loss_value):
print("Loss is {}, stopping training".format(loss_value))
print(loss_dict_reduced)
sys.exit(1)
optimizer.zero_grad()
losses.backward()
optimizer.step()
if lr_scheduler is not None:
lr_scheduler.step()
metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
metric_logger.update(lr=optimizer.param_groups[0]["lr"])
return metric_logger
def evaluate_loss(data_loader):
''' The function calculates the loss error for a validation set '''
model.train()
with torch.no_grad():
losses_graph = []
for images, targets in tqdm(data_loader):
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
losses_graph.append(float(losses.detach().cpu()))
loss = sum(losses_graph)/len(losses_graph)
return loss
Latest research papers tend to give results for the COCO dataset only. In COCO mAP, a 101-point interpolated AP definition is used in the calculation. For COCO, AP is the average over multiple IoU (the minimum IoU to consider a positive match). AP@[.5:.95] corresponds to the average AP for IoU from 0.5 to 0.95 with a step size of 0.05. For the COCO competition, AP is the average over 10 IoU levels on 80 categories (AP@[.50:.05:.95]: start from 0.5 to 0.95 with a step size of 0.05). The following are some other metrics collected for the COCO dataset.

from tqdm import tqdm_notebook as tqdm
from engine import *
def run ():
'''Conduct a complete training for one model.'''
model_metrics = {'train':
{'loss': [],
'average precision': [],
'average recall': []},
'validition':
{'loss': [],
'average precision': [],
'average recall': []}}
for epoch in tqdm(range(num_epochs)):
# train for one epoch, printing every 10 iterations
stats = train(model, optimizer, trian_data_loader, device, epoch, print_freq=10)
model_metrics['train']['loss'].append(stats.meters['loss'].value)
eval_metrics = evaluate(model, trian_data_loader, device=device)
# add to graphs plotting section
model_metrics['train']['average precision'].append(
eval_metrics.coco_eval['bbox'].stats[0])
model_metrics['train']['average recall'].append(
eval_metrics.coco_eval['bbox'].stats[-4])
# add to tensorboard
writer.add_scalar("Loss/ Train", stats.meters['loss'].value, epoch)
writer.add_scalar("Loss/loss_classifier/ Train", stats.meters['loss_classifier'].value, epoch)
writer.add_scalar("Loss/loss_box_reg/ Train", stats.meters['loss_box_reg'].value, epoch)
writer.add_scalar("Loss/loss_objectness/ Train", stats.meters['loss_objectness'].value, epoch)
writer.add_scalar("Loss/loss_rpn_box_reg/ Train", stats.meters['loss_rpn_box_reg'].value, epoch)
# evaluate on the test dataset
model_metrics['validition']['loss'].append(evaluate_loss(val_data_loader))
eval_metrics = evaluate(model, val_data_loader, device=device)
apt = eval_metrics.coco_eval['bbox'].stats[0]
art = eval_metrics.coco_eval['bbox'].stats[-4]
# add to graphs plotting section
model_metrics['validition']['average precision'].append(apt)
model_metrics['validition']['average recall'].append(art)
# add to tensorboard
writer.add_scalar("Average Precision/ Train", apt, epoch)
writer.add_scalar("Average Recall/ Train", art, epoch)
# update the learning rate
lr_scheduler.step()
return model_metrics
To decide which model is better we select 5 five optional models. Each modle has initialize weights with pre-trained on ImageNet.
The models was trained with 10 epochs. For setting optimizer, lr scheduler hyperparamters we used the ones from TorchVision Object Detection Finetuning Tutorial. The results from each model were compared base on average precision results. The model which achive the best precision will continue to make adjustments
Models:
Optimizer = SGD(model.parameters(),
lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.1)
compare_result = []
CHECKPOINT_DIR_PATH = '/content/gdrive/My Drive/Colab Notebooks/new_result.pt'
# ['resnet50', 'resnet101', 'resnet152', 'mobilenet', 'squeezenet']
for name in ['resnet50', 'resnet101', 'resnet152', 'mobilenet', 'squeezenet']:
#Set model
model = get_model(name)
model.to(device)
# let's train it for 10 epochs
num_epochs = 10
#Set Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.005,
momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.1)
compare_result.append((name, run(), model.state_dict()))
torch.save(compare_result, CHECKPOINT_DIR_PATH)
del model
res1 = torch.load('/content/gdrive/My Drive/Colab Notebooks/result resnet50 resnet101.pt')
res2 = torch.load('/content/gdrive/My Drive/Colab Notebooks/result2.pt')
plt.figure(figsize=[20, 10])
plt.subplot(1, 3, 1)
plt.title('Average Precision', fontsize=20)
for result in res1:
label, details, model= result[:3]
plt.plot(details['validition']['average precision'], label=label)
for result in res2:
label, details, model= result[:3]
plt.plot(details['validition']['average precision'], label=label)
plt.legend()
plt.subplot(1, 3, 2)
plt.title('Average Recall', fontsize=20)
for result in res1:
label, details, model= result[:3]
plt.plot(details['validition']['average recall'], label=label)
for result in res2:
label, details, model= result[:3]
plt.plot(details['validition']['average recall'], label=label)
plt.legend()
plt.subplot(1, 3, 3)
plt.title('Loss', fontsize=20)
for result in res1:
label, details, model= result[:3]
plt.plot(details['validition']['loss'], label=label)
for result in res2:
label, details, model= result[:3]
plt.plot(details['validition']['loss'], label=label)
plt.legend()
plt.show()
From the graphs above we can see that resnet101 got the hightest percision score, it mean that this is the selected Architecture.
This net will undergo further adjustments and will be trained again, but for now let's see the predictions and the success of the net!
from PIL import Image, ImageDraw
def show_prediction(image, predictions, ground_truth=None, threshold=25):
trans = transforms.ToPILImage()
image = trans(image.detach().cpu())
draw = ImageDraw.Draw(image)
if ground_truth:
for anno in ground_truth:
draw.rectangle([(anno[0], anno[1]), (anno[2], anno[3])],
outline ="#6e090c", width =2)
for anno, score in zip(predictions[0]['boxes'], predictions[0]['scores']):
anno = anno.detach().cpu().numpy().astype(np.int)
score = score.detach().cpu().numpy().astype(np.float)
rscore = np.round(score*100, 2)
if rscore > threshold:
draw.rectangle([(anno[2], anno[3]), (anno[0], anno[1])],
outline ="#bdffff", width =2)
draw.rectangle([(anno[0], anno[1]-9),
(anno[0]+43, (anno[1]-2))],
fill ="#bdffff")
draw.text((anno[0], anno[1]-10), f"{rscore} %", (0,0,0))
display(image)
def test_predictions(model, dataloader, threshold=25):
for sample_img, sample_ann in test_data_loader:
model.eval()
imgs = list(img.to(device) for img in sample_img)
annotations = []
for dic in sample_ann:
for bbox in dic['boxes'].detach().cpu().numpy():
annotations.append(bbox)
result_dict = model(imgs)
show_prediction(imgs[0], result_dict, annotations, threshold)
def load_best_model(name):
for m in res1:
if m[0] == name:
model = get_model(m[0])
model.load_state_dict(m[2])
model.to(device)
return model, m[1]
dirty_model, dirty_result = load_best_model('resnet101')
test_predictions(dirty_model, test_data_loader, 50)
AdaMod is a stochastic optimizer that restricts adaptive learning rates with adaptive and momental upper bounds. The dynamic learning rate bounds are based on the exponential moving averages of the adaptive learning rates themselves, which smooth out unexpected large learning rates and stabilize the training of deep neural networks.
AdaMod is a drop in replacement for Adam. The only change is a new hyperparameter called B3, or Beta3. This controls the degree of lookback for the long term clipping average.
A number of Schedulers were tested to accommodate some learning in training:
StepLR - which requires a lot of time for good adjusting the scheduler hyperparameters.
cosine learing decay - implemented cosine learing decay implementation from pytorch Schedulers which gave a different learning rate (compared to the optimizer learning rate and could not be used as mention in the git) manual cosine learing decay implementation which that solve the problem and achieved a moderate reduction in learning rate throughout training and evalotion learning rate values ​​as the optimizers
references
!pip install adamod
import adamod as adamod
from torch.optim.lr_scheduler import _LRScheduler
import math
# https://github.com/pytorch/pytorch/issues/17913
class LegacyCosineAnnealingLR(_LRScheduler):
r"""Set the learning rate of each parameter group using a cosine annealing
schedule, where :math:`\eta_{max}` is set to the initial lr and
:math:`T_{cur}` is the number of epochs since the last restart in SGDR:
.. math::
\eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})(1 +
\cos(\frac{T_{cur}}{T_{max}}\pi))
When last_epoch=-1, sets initial lr as lr.
It has been proposed in
`SGDR: Stochastic Gradient Descent with Warm Restarts`_. Note that this only
implements the cosine annealing part of SGDR, and not the restarts.
Args:
optimizer (Optimizer): Wrapped optimizer.
T_max (int): Maximum number of iterations.
eta_min (float): Minimum learning rate. Default: 0.
last_epoch (int): The index of last epoch. Default: -1.
.. _SGDR\: Stochastic Gradient Descent with Warm Restarts:
https://arxiv.org/abs/1608.03983
"""
def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1):
self.T_max = T_max
self.eta_min = eta_min
super(LegacyCosineAnnealingLR, self).__init__(optimizer, last_epoch)
def get_lr(self):
return [self.eta_min + (base_lr - self.eta_min) *
(1 + math.cos(math.pi * self.last_epoch / self.T_max)) / 2
for base_lr in self.base_lrs]
CHECKPOINT_DIR_PATH = '/content/gdrive/My Drive/Colab Notebooks/resnet101v2.pt'
#Set model
model = get_model('resnet101')
# move model to the right device
model.to(device)
# let's train it for 10 epochs
num_epochs = 10
#Set Optimizer
optimizer = adamod.AdaMod(model.parameters(), lr=0.0001, weight_decay=0.0001, beta3=0.999)
# and a learning rate scheduler
lr_scheduler = LegacyCosineAnnealingLR(optimizer, T_max=num_epochs)
# Learning Rate Scheduler
lr_scheduler = LegacyCosineAnnealingLR(optimizer, T_max=num_epochs)
lr_scheduler.eta_min = 1.0000000000000002e-07
compare_result = run()
# CHECKPOINT_DIR_PATH = '/content/gdrive/My Drive/Colab Notebooks/resnet101v2.pt'
torch.save([compare_result, model.state_dict()], CHECKPOINT_DIR_PATH)
a = torch.load('/content/gdrive/My Drive/Colab Notebooks/resnet101v2.pt')
model = get_model('resnet101')
model.load_state_dict(a[1])
model.to(device)
test_predictions(model, test_data_loader, 55)
plt.plot(compare_result['validition']['average precision'], label='after')
plt.plot(dirty_result['validition']['average precision'], label='before')
plt.legend()
plt.show()
import torchvision
from torchvision.models import *
models_list = [alexnet,
densenet121, googlenet, inception_v3,
mobilenet_v2, resnet101, resnet152,
resnet50, squeezenet1_0]
models_name = ['alexnet', 'densenet121', 'googlenet',
'inception_v3', 'mobilenet_v2', 'resnet101',
'resnet152', 'resnet50', 'squeezenet1_0']
find_max = 0
for model, name in zip(models_list, models_name):
t = model(pretrained=True)
pytorch_total_params = sum(p.numel() for p in t.parameters())
print(f'{name}: {pytorch_total_params}')
if pytorch_total_params > find_max:
find_max = pytorch_total_params
max_model_name = name
print(f'Max:\n{max_model_name}: {find_max}')
num_classes=2
anchor_generator = AnchorGenerator(
sizes=tuple([(32, 64, 128, 256) for _ in range(5)]),
aspect_ratios=tuple([(0.25, 0.5, 1.0, 2.0) for _ in range(5)]))
backbone = torchvision.models.alexnet(pretrained=True).features
backbone.out_channels = 256
model = FasterRCNN(backbone, num_classes,
rpn_anchor_generator=anchor_generator,
rpn_head=RPNHead(backbone.out_channels, anchor_generator.num_anchors_per_location()[0]))
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes = num_classes)
CHECKPOINT_DIR_PATH = '/content/gdrive/My Drive/Colab Notebooks/alexnet.pt'
# move model to the right device
model.to(device)
# let's train it for 10 epochs
num_epochs = 10
#Set Optimizer
optimizer = adamod.AdaMod(model.parameters(), lr=0.0001, weight_decay=0.0001, beta3=0.999)
# and a learning rate scheduler
lr_scheduler = LegacyCosineAnnealingLR(optimizer, T_max=num_epochs)
# Learning Rate Scheduler
lr_scheduler = LegacyCosineAnnealingLR(optimizer, T_max=num_epochs)
lr_scheduler.eta_min = 1.0000000000000002e-07
compare_result = run()
torch.save([compare_result, model.state_dict()], CHECKPOINT_DIR_PATH)
resnet = torch.load('/content/gdrive/My Drive/Colab Notebooks/resnet101v2.pt')
alexnet = torch.load('/content/gdrive/My Drive/Colab Notebooks/alexnet.pt')
plt.title('average precision', fontsize=20)
plt.plot(resnet[0]['validition']['average precision'], label='resnet 101')
plt.plot(alexnet['validition']['average precision'], label='alexnet')
plt.legend()
plt.show()
test_predictions(model, test_data_loader, 50)
As can be seen from the graph and forecasts choosing an architecture with a lot of features will not contribute to the improvement of the model and in fact, will hurt and lower the average accuracy. The model enters a state of overfitting. The model fails to detect masks even when they appear close and clear in the image whereas detecting wrongly other objects as face masks. The conclusion, from the three models which were trained the largest model achieved the worst result.